In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
from sklearn import metrics
In [2]:
from sklearn.naive_bayes import GaussianNB
In [3]:
wine_df= pd.read_csv("WineQT.csv")
In [4]:
wine_df.head()
Out[4]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
0 7.4 0.70 0.00 1.9 0.076 11.0 34.0 0.9978 3.51 0.56 9.4 5 0
1 7.8 0.88 0.00 2.6 0.098 25.0 67.0 0.9968 3.20 0.68 9.8 5 1
2 7.8 0.76 0.04 2.3 0.092 15.0 54.0 0.9970 3.26 0.65 9.8 5 2
3 11.2 0.28 0.56 1.9 0.075 17.0 60.0 0.9980 3.16 0.58 9.8 6 3
4 7.4 0.70 0.00 1.9 0.076 11.0 34.0 0.9978 3.51 0.56 9.4 5 4
In [5]:
wine_df.describe()
Out[5]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
count 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000 1143.000000
mean 8.311111 0.531339 0.268364 2.532152 0.086933 15.615486 45.914698 0.996730 3.311015 0.657708 10.442111 5.657043 804.969379
std 1.747595 0.179633 0.196686 1.355917 0.047267 10.250486 32.782130 0.001925 0.156664 0.170399 1.082196 0.805824 463.997116
min 4.600000 0.120000 0.000000 0.900000 0.012000 1.000000 6.000000 0.990070 2.740000 0.330000 8.400000 3.000000 0.000000
25% 7.100000 0.392500 0.090000 1.900000 0.070000 7.000000 21.000000 0.995570 3.205000 0.550000 9.500000 5.000000 411.000000
50% 7.900000 0.520000 0.250000 2.200000 0.079000 13.000000 37.000000 0.996680 3.310000 0.620000 10.200000 6.000000 794.000000
75% 9.100000 0.640000 0.420000 2.600000 0.090000 21.000000 61.000000 0.997845 3.400000 0.730000 11.100000 6.000000 1209.500000
max 15.900000 1.580000 1.000000 15.500000 0.611000 68.000000 289.000000 1.003690 4.010000 2.000000 14.900000 8.000000 1597.000000
In [6]:
wine_df.describe().transpose()
Out[6]:
count mean std min 25% 50% 75% max
fixed acidity 1143.0 8.311111 1.747595 4.60000 7.10000 7.90000 9.100000 15.90000
volatile acidity 1143.0 0.531339 0.179633 0.12000 0.39250 0.52000 0.640000 1.58000
citric acid 1143.0 0.268364 0.196686 0.00000 0.09000 0.25000 0.420000 1.00000
residual sugar 1143.0 2.532152 1.355917 0.90000 1.90000 2.20000 2.600000 15.50000
chlorides 1143.0 0.086933 0.047267 0.01200 0.07000 0.07900 0.090000 0.61100
free sulfur dioxide 1143.0 15.615486 10.250486 1.00000 7.00000 13.00000 21.000000 68.00000
total sulfur dioxide 1143.0 45.914698 32.782130 6.00000 21.00000 37.00000 61.000000 289.00000
density 1143.0 0.996730 0.001925 0.99007 0.99557 0.99668 0.997845 1.00369
pH 1143.0 3.311015 0.156664 2.74000 3.20500 3.31000 3.400000 4.01000
sulphates 1143.0 0.657708 0.170399 0.33000 0.55000 0.62000 0.730000 2.00000
alcohol 1143.0 10.442111 1.082196 8.40000 9.50000 10.20000 11.100000 14.90000
quality 1143.0 5.657043 0.805824 3.00000 5.00000 6.00000 6.000000 8.00000
Id 1143.0 804.969379 463.997116 0.00000 411.00000 794.00000 1209.500000 1597.00000
In [7]:
pip install seaborn matplotlib
Requirement already satisfied: seaborn in c:\users\amrendra mishra\anaconda3\lib\site-packages (0.12.2)
Requirement already satisfied: matplotlib in c:\users\amrendra mishra\anaconda3\lib\site-packages (3.7.2)
Requirement already satisfied: numpy!=1.24.0,>=1.17 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from seaborn) (1.24.3)
Requirement already satisfied: pandas>=0.25 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from seaborn) (2.0.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (1.0.5)
Requirement already satisfied: cycler>=0.10 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (23.1)
Requirement already satisfied: pillow>=6.2.0 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (9.4.0)
Requirement already satisfied: pyparsing<3.1,>=2.3.1 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from matplotlib) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from pandas>=0.25->seaborn) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from pandas>=0.25->seaborn) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\amrendra mishra\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("WineQT.csv")

# Create pairplot
sns.pairplot(data)
plt.show()
C:\Users\Amrendra Mishra\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
  self._figure.tight_layout(*args, **kwargs)
In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv("WineQT.csv")

# Separate features (X) and target variable (y)
X = data.drop(columns=['free sulfur dioxide'])  # Replace 'target_column' with the name of your target column
y = data['free sulfur dioxide']  # Replace 'target_column' with the name of your target column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# You can adjust the test_size parameter to change the proportion of the dataset allocated for testing
# random_state ensures reproducibility, you can change it to any integer value or set it to None for random split

# Now X_train and y_train contain the features and target variable for training, respectively
# And X_test and y_test contain the features and target variable for testing, respectively
In [11]:
X
Out[11]:
fixed acidity volatile acidity citric acid residual sugar chlorides total sulfur dioxide density pH sulphates alcohol quality Id
0 7.4 0.700 0.00 1.9 0.076 34.0 0.99780 3.51 0.56 9.4 5 0
1 7.8 0.880 0.00 2.6 0.098 67.0 0.99680 3.20 0.68 9.8 5 1
2 7.8 0.760 0.04 2.3 0.092 54.0 0.99700 3.26 0.65 9.8 5 2
3 11.2 0.280 0.56 1.9 0.075 60.0 0.99800 3.16 0.58 9.8 6 3
4 7.4 0.700 0.00 1.9 0.076 34.0 0.99780 3.51 0.56 9.4 5 4
... ... ... ... ... ... ... ... ... ... ... ... ...
1138 6.3 0.510 0.13 2.3 0.076 40.0 0.99574 3.42 0.75 11.0 6 1592
1139 6.8 0.620 0.08 1.9 0.068 38.0 0.99651 3.42 0.82 9.5 6 1593
1140 6.2 0.600 0.08 2.0 0.090 44.0 0.99490 3.45 0.58 10.5 5 1594
1141 5.9 0.550 0.10 2.2 0.062 51.0 0.99512 3.52 0.76 11.2 6 1595
1142 5.9 0.645 0.12 2.0 0.075 44.0 0.99547 3.57 0.71 10.2 5 1597

1143 rows × 12 columns

In [12]:
y
Out[12]:
0       11.0
1       25.0
2       15.0
3       17.0
4       11.0
        ... 
1138    29.0
1139    28.0
1140    32.0
1141    39.0
1142    32.0
Name: free sulfur dioxide, Length: 1143, dtype: float64
In [13]:
X_train
Out[13]:
fixed acidity volatile acidity citric acid residual sugar chlorides total sulfur dioxide density pH sulphates alcohol quality Id
12 8.5 0.28 0.56 1.8 0.092 103.0 0.99690 3.30 0.75 10.5 7 16
758 9.9 0.32 0.56 2.0 0.073 8.0 0.99534 3.15 0.73 11.4 6 1076
636 8.9 0.31 0.36 2.6 0.056 39.0 0.99562 3.40 0.69 11.8 5 900
1109 6.6 0.88 0.04 2.2 0.066 20.0 0.99636 3.53 0.56 9.9 5 1556
743 7.6 0.42 0.25 3.9 0.104 90.0 0.99784 3.15 0.57 9.1 5 1057
... ... ... ... ... ... ... ... ... ... ... ... ...
1044 6.7 1.04 0.08 2.3 0.067 32.0 0.99648 3.52 0.57 11.0 4 1467
1095 8.0 0.39 0.30 1.9 0.074 84.0 0.99717 3.39 0.61 9.0 5 1533
1130 7.4 0.35 0.33 2.4 0.068 26.0 0.99470 3.36 0.60 11.9 6 1580
860 7.9 0.57 0.31 2.0 0.079 79.0 0.99677 3.29 0.69 9.5 6 1216
1126 7.5 0.52 0.40 2.2 0.060 20.0 0.99474 3.26 0.64 11.8 6 1575

914 rows × 12 columns

In [14]:
X_test
Out[14]:
fixed acidity volatile acidity citric acid residual sugar chlorides total sulfur dioxide density pH sulphates alcohol quality Id
158 6.8 0.610 0.04 1.5 0.057 10.0 0.99525 3.42 0.60 9.500000 5 222
1081 6.9 0.840 0.21 4.1 0.074 65.0 0.99842 3.53 0.72 9.233333 6 1514
291 7.0 0.580 0.12 1.9 0.091 124.0 0.99560 3.44 0.48 10.500000 5 417
538 7.8 0.480 0.68 1.7 0.415 32.0 0.99656 3.09 1.06 9.100000 6 754
367 12.5 0.600 0.49 4.3 0.100 14.0 1.00100 3.25 0.74 11.900000 6 516
... ... ... ... ... ... ... ... ... ... ... ... ...
66 5.0 1.020 0.04 1.4 0.045 85.0 0.99380 3.75 0.48 10.500000 4 94
328 10.3 0.500 0.42 2.0 0.069 51.0 0.99820 3.16 0.72 11.500000 6 466
67 6.8 0.775 0.00 3.0 0.102 23.0 0.99650 3.45 0.56 10.700000 5 96
231 10.0 0.490 0.20 11.0 0.071 50.0 1.00150 3.16 0.69 9.200000 6 325
966 11.6 0.475 0.40 1.4 0.091 28.0 0.99704 3.07 0.65 10.033333 6 1359

229 rows × 12 columns

In [15]:
y_train
Out[15]:
12      35.0
758      3.0
636     10.0
1109    12.0
743     28.0
        ... 
1044    19.0
1095    32.0
1130     9.0
860     10.0
1126    12.0
Name: free sulfur dioxide, Length: 914, dtype: float64
In [16]:
y_test
Out[16]:
158      5.0
1081    16.0
291     34.0
538     14.0
367      5.0
        ... 
66      41.0
328     21.0
67       8.0
231     13.0
966      6.0
Name: free sulfur dioxide, Length: 229, dtype: float64
In [17]:
X_train, X_test, y_train, y_test
Out[17]:
(      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
 12              8.5              0.28         0.56             1.8      0.092   
 758             9.9              0.32         0.56             2.0      0.073   
 636             8.9              0.31         0.36             2.6      0.056   
 1109            6.6              0.88         0.04             2.2      0.066   
 743             7.6              0.42         0.25             3.9      0.104   
 ...             ...               ...          ...             ...        ...   
 1044            6.7              1.04         0.08             2.3      0.067   
 1095            8.0              0.39         0.30             1.9      0.074   
 1130            7.4              0.35         0.33             2.4      0.068   
 860             7.9              0.57         0.31             2.0      0.079   
 1126            7.5              0.52         0.40             2.2      0.060   
 
       total sulfur dioxide  density    pH  sulphates  alcohol  quality    Id  
 12                   103.0  0.99690  3.30       0.75     10.5        7    16  
 758                    8.0  0.99534  3.15       0.73     11.4        6  1076  
 636                   39.0  0.99562  3.40       0.69     11.8        5   900  
 1109                  20.0  0.99636  3.53       0.56      9.9        5  1556  
 743                   90.0  0.99784  3.15       0.57      9.1        5  1057  
 ...                    ...      ...   ...        ...      ...      ...   ...  
 1044                  32.0  0.99648  3.52       0.57     11.0        4  1467  
 1095                  84.0  0.99717  3.39       0.61      9.0        5  1533  
 1130                  26.0  0.99470  3.36       0.60     11.9        6  1580  
 860                   79.0  0.99677  3.29       0.69      9.5        6  1216  
 1126                  20.0  0.99474  3.26       0.64     11.8        6  1575  
 
 [914 rows x 12 columns],
       fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
 158             6.8             0.610         0.04             1.5      0.057   
 1081            6.9             0.840         0.21             4.1      0.074   
 291             7.0             0.580         0.12             1.9      0.091   
 538             7.8             0.480         0.68             1.7      0.415   
 367            12.5             0.600         0.49             4.3      0.100   
 ...             ...               ...          ...             ...        ...   
 66              5.0             1.020         0.04             1.4      0.045   
 328            10.3             0.500         0.42             2.0      0.069   
 67              6.8             0.775         0.00             3.0      0.102   
 231            10.0             0.490         0.20            11.0      0.071   
 966            11.6             0.475         0.40             1.4      0.091   
 
       total sulfur dioxide  density    pH  sulphates    alcohol  quality    Id  
 158                   10.0  0.99525  3.42       0.60   9.500000        5   222  
 1081                  65.0  0.99842  3.53       0.72   9.233333        6  1514  
 291                  124.0  0.99560  3.44       0.48  10.500000        5   417  
 538                   32.0  0.99656  3.09       1.06   9.100000        6   754  
 367                   14.0  1.00100  3.25       0.74  11.900000        6   516  
 ...                    ...      ...   ...        ...        ...      ...   ...  
 66                    85.0  0.99380  3.75       0.48  10.500000        4    94  
 328                   51.0  0.99820  3.16       0.72  11.500000        6   466  
 67                    23.0  0.99650  3.45       0.56  10.700000        5    96  
 231                   50.0  1.00150  3.16       0.69   9.200000        6   325  
 966                   28.0  0.99704  3.07       0.65  10.033333        6  1359  
 
 [229 rows x 12 columns],
 12      35.0
 758      3.0
 636     10.0
 1109    12.0
 743     28.0
         ... 
 1044    19.0
 1095    32.0
 1130     9.0
 860     10.0
 1126    12.0
 Name: free sulfur dioxide, Length: 914, dtype: float64,
 158      5.0
 1081    16.0
 291     34.0
 538     14.0
 367      5.0
         ... 
 66      41.0
 328     21.0
 67       8.0
 231     13.0
 966      6.0
 Name: free sulfur dioxide, Length: 229, dtype: float64)
In [18]:
X.head()
Out[18]:
fixed acidity volatile acidity citric acid residual sugar chlorides total sulfur dioxide density pH sulphates alcohol quality Id
0 7.4 0.70 0.00 1.9 0.076 34.0 0.9978 3.51 0.56 9.4 5 0
1 7.8 0.88 0.00 2.6 0.098 67.0 0.9968 3.20 0.68 9.8 5 1
2 7.8 0.76 0.04 2.3 0.092 54.0 0.9970 3.26 0.65 9.8 5 2
3 11.2 0.28 0.56 1.9 0.075 60.0 0.9980 3.16 0.58 9.8 6 3
4 7.4 0.70 0.00 1.9 0.076 34.0 0.9978 3.51 0.56 9.4 5 4
In [24]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
In [25]:
# Load the dataset
data = pd.read_csv("WineQT.csv")
In [31]:
data
Out[31]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
0 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5 0
1 7.8 0.880 0.00 2.6 0.098 25.0 67.0 0.99680 3.20 0.68 9.8 5 1
2 7.8 0.760 0.04 2.3 0.092 15.0 54.0 0.99700 3.26 0.65 9.8 5 2
3 11.2 0.280 0.56 1.9 0.075 17.0 60.0 0.99800 3.16 0.58 9.8 6 3
4 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5 4
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1138 6.3 0.510 0.13 2.3 0.076 29.0 40.0 0.99574 3.42 0.75 11.0 6 1592
1139 6.8 0.620 0.08 1.9 0.068 28.0 38.0 0.99651 3.42 0.82 9.5 6 1593
1140 6.2 0.600 0.08 2.0 0.090 32.0 44.0 0.99490 3.45 0.58 10.5 5 1594
1141 5.9 0.550 0.10 2.2 0.062 39.0 51.0 0.99512 3.52 0.76 11.2 6 1595
1142 5.9 0.645 0.12 2.0 0.075 32.0 44.0 0.99547 3.57 0.71 10.2 5 1597

1143 rows × 13 columns

In [26]:
# Separate features (X) and target variable (y)
X = data.drop(columns=['residual sugar'])
y = data['residual sugar']
In [32]:
X
Out[32]:
fixed acidity volatile acidity citric acid chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
0 7.4 0.700 0.00 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5 0
1 7.8 0.880 0.00 0.098 25.0 67.0 0.99680 3.20 0.68 9.8 5 1
2 7.8 0.760 0.04 0.092 15.0 54.0 0.99700 3.26 0.65 9.8 5 2
3 11.2 0.280 0.56 0.075 17.0 60.0 0.99800 3.16 0.58 9.8 6 3
4 7.4 0.700 0.00 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5 4
... ... ... ... ... ... ... ... ... ... ... ... ...
1138 6.3 0.510 0.13 0.076 29.0 40.0 0.99574 3.42 0.75 11.0 6 1592
1139 6.8 0.620 0.08 0.068 28.0 38.0 0.99651 3.42 0.82 9.5 6 1593
1140 6.2 0.600 0.08 0.090 32.0 44.0 0.99490 3.45 0.58 10.5 5 1594
1141 5.9 0.550 0.10 0.062 39.0 51.0 0.99512 3.52 0.76 11.2 6 1595
1142 5.9 0.645 0.12 0.075 32.0 44.0 0.99547 3.57 0.71 10.2 5 1597

1143 rows × 12 columns

In [33]:
y
Out[33]:
0       1.9
1       2.6
2       2.3
3       1.9
4       1.9
       ... 
1138    2.3
1139    1.9
1140    2.0
1141    2.2
1142    2.0
Name: residual sugar, Length: 1143, dtype: float64
In [27]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [34]:
X_train
Out[34]:
fixed acidity volatile acidity citric acid chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
12 8.5 0.28 0.56 0.092 35.0 103.0 0.99690 3.30 0.75 10.5 7 16
758 9.9 0.32 0.56 0.073 3.0 8.0 0.99534 3.15 0.73 11.4 6 1076
636 8.9 0.31 0.36 0.056 10.0 39.0 0.99562 3.40 0.69 11.8 5 900
1109 6.6 0.88 0.04 0.066 12.0 20.0 0.99636 3.53 0.56 9.9 5 1556
743 7.6 0.42 0.25 0.104 28.0 90.0 0.99784 3.15 0.57 9.1 5 1057
... ... ... ... ... ... ... ... ... ... ... ... ...
1044 6.7 1.04 0.08 0.067 19.0 32.0 0.99648 3.52 0.57 11.0 4 1467
1095 8.0 0.39 0.30 0.074 32.0 84.0 0.99717 3.39 0.61 9.0 5 1533
1130 7.4 0.35 0.33 0.068 9.0 26.0 0.99470 3.36 0.60 11.9 6 1580
860 7.9 0.57 0.31 0.079 10.0 79.0 0.99677 3.29 0.69 9.5 6 1216
1126 7.5 0.52 0.40 0.060 12.0 20.0 0.99474 3.26 0.64 11.8 6 1575

914 rows × 12 columns

In [35]:
X_test
Out[35]:
fixed acidity volatile acidity citric acid chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
158 6.8 0.610 0.04 0.057 5.0 10.0 0.99525 3.42 0.60 9.500000 5 222
1081 6.9 0.840 0.21 0.074 16.0 65.0 0.99842 3.53 0.72 9.233333 6 1514
291 7.0 0.580 0.12 0.091 34.0 124.0 0.99560 3.44 0.48 10.500000 5 417
538 7.8 0.480 0.68 0.415 14.0 32.0 0.99656 3.09 1.06 9.100000 6 754
367 12.5 0.600 0.49 0.100 5.0 14.0 1.00100 3.25 0.74 11.900000 6 516
... ... ... ... ... ... ... ... ... ... ... ... ...
66 5.0 1.020 0.04 0.045 41.0 85.0 0.99380 3.75 0.48 10.500000 4 94
328 10.3 0.500 0.42 0.069 21.0 51.0 0.99820 3.16 0.72 11.500000 6 466
67 6.8 0.775 0.00 0.102 8.0 23.0 0.99650 3.45 0.56 10.700000 5 96
231 10.0 0.490 0.20 0.071 13.0 50.0 1.00150 3.16 0.69 9.200000 6 325
966 11.6 0.475 0.40 0.091 6.0 28.0 0.99704 3.07 0.65 10.033333 6 1359

229 rows × 12 columns

In [36]:
y_train
Out[36]:
12      1.8
758     2.0
636     2.6
1109    2.2
743     3.9
       ... 
1044    2.3
1095    1.9
1130    2.4
860     2.0
1126    2.2
Name: residual sugar, Length: 914, dtype: float64
In [37]:
y_test
Out[37]:
158      1.5
1081     4.1
291      1.9
538      1.7
367      4.3
        ... 
66       1.4
328      2.0
67       3.0
231     11.0
966      1.4
Name: residual sugar, Length: 229, dtype: float64
In [38]:
X_train, X_test, y_train, y_test
Out[38]:
(      fixed acidity  volatile acidity  citric acid  chlorides  \
 12              8.5              0.28         0.56      0.092   
 758             9.9              0.32         0.56      0.073   
 636             8.9              0.31         0.36      0.056   
 1109            6.6              0.88         0.04      0.066   
 743             7.6              0.42         0.25      0.104   
 ...             ...               ...          ...        ...   
 1044            6.7              1.04         0.08      0.067   
 1095            8.0              0.39         0.30      0.074   
 1130            7.4              0.35         0.33      0.068   
 860             7.9              0.57         0.31      0.079   
 1126            7.5              0.52         0.40      0.060   
 
       free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
 12                   35.0                 103.0  0.99690  3.30       0.75   
 758                   3.0                   8.0  0.99534  3.15       0.73   
 636                  10.0                  39.0  0.99562  3.40       0.69   
 1109                 12.0                  20.0  0.99636  3.53       0.56   
 743                  28.0                  90.0  0.99784  3.15       0.57   
 ...                   ...                   ...      ...   ...        ...   
 1044                 19.0                  32.0  0.99648  3.52       0.57   
 1095                 32.0                  84.0  0.99717  3.39       0.61   
 1130                  9.0                  26.0  0.99470  3.36       0.60   
 860                  10.0                  79.0  0.99677  3.29       0.69   
 1126                 12.0                  20.0  0.99474  3.26       0.64   
 
       alcohol  quality    Id  
 12       10.5        7    16  
 758      11.4        6  1076  
 636      11.8        5   900  
 1109      9.9        5  1556  
 743       9.1        5  1057  
 ...       ...      ...   ...  
 1044     11.0        4  1467  
 1095      9.0        5  1533  
 1130     11.9        6  1580  
 860       9.5        6  1216  
 1126     11.8        6  1575  
 
 [914 rows x 12 columns],
       fixed acidity  volatile acidity  citric acid  chlorides  \
 158             6.8             0.610         0.04      0.057   
 1081            6.9             0.840         0.21      0.074   
 291             7.0             0.580         0.12      0.091   
 538             7.8             0.480         0.68      0.415   
 367            12.5             0.600         0.49      0.100   
 ...             ...               ...          ...        ...   
 66              5.0             1.020         0.04      0.045   
 328            10.3             0.500         0.42      0.069   
 67              6.8             0.775         0.00      0.102   
 231            10.0             0.490         0.20      0.071   
 966            11.6             0.475         0.40      0.091   
 
       free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
 158                   5.0                  10.0  0.99525  3.42       0.60   
 1081                 16.0                  65.0  0.99842  3.53       0.72   
 291                  34.0                 124.0  0.99560  3.44       0.48   
 538                  14.0                  32.0  0.99656  3.09       1.06   
 367                   5.0                  14.0  1.00100  3.25       0.74   
 ...                   ...                   ...      ...   ...        ...   
 66                   41.0                  85.0  0.99380  3.75       0.48   
 328                  21.0                  51.0  0.99820  3.16       0.72   
 67                    8.0                  23.0  0.99650  3.45       0.56   
 231                  13.0                  50.0  1.00150  3.16       0.69   
 966                   6.0                  28.0  0.99704  3.07       0.65   
 
         alcohol  quality    Id  
 158    9.500000        5   222  
 1081   9.233333        6  1514  
 291   10.500000        5   417  
 538    9.100000        6   754  
 367   11.900000        6   516  
 ...         ...      ...   ...  
 66    10.500000        4    94  
 328   11.500000        6   466  
 67    10.700000        5    96  
 231    9.200000        6   325  
 966   10.033333        6  1359  
 
 [229 rows x 12 columns],
 12      1.8
 758     2.0
 636     2.6
 1109    2.2
 743     3.9
        ... 
 1044    2.3
 1095    1.9
 1130    2.4
 860     2.0
 1126    2.2
 Name: residual sugar, Length: 914, dtype: float64,
 158      1.5
 1081     4.1
 291      1.9
 538      1.7
 367      4.3
         ... 
 66       1.4
 328      2.0
 67       3.0
 231     11.0
 966      1.4
 Name: residual sugar, Length: 229, dtype: float64)
In [28]:
# Create and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)
Out[28]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [39]:
model
Out[39]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [29]:
# Predict on the testing set
y_pred = model.predict(X_test)
In [40]:
y_pred
Out[40]:
array([0.85541521, 2.92111251, 2.63536686, 1.92619682, 4.331679  ,
       1.75598103, 1.94732319, 2.36361949, 2.91919823, 3.23236467,
       5.15313611, 2.45247144, 3.71430601, 1.99506139, 1.31946845,
       3.34649716, 2.17391224, 2.47120965, 1.506877  , 2.79426165,
       2.20532127, 4.70751107, 1.55147743, 3.22462835, 2.71968141,
       2.42767196, 1.43147484, 2.40167697, 1.88174965, 2.00613936,
       0.99549929, 2.67300205, 2.31174015, 3.27338509, 3.85049301,
       2.49072281, 4.09469182, 1.95046287, 1.43350157, 3.03271183,
       1.91064408, 2.23091637, 1.91110204, 1.7637472 , 2.75759394,
       1.81628855, 2.64938726, 3.94136753, 2.55747901, 2.62551073,
       2.73428007, 1.16220816, 2.1234848 , 3.62221452, 2.16403308,
       2.80646819, 3.48760372, 3.46626603, 1.38440336, 2.21042875,
       3.5384896 , 2.16190474, 2.86238898, 1.51511526, 2.29287929,
       2.34895994, 2.56619979, 1.79924575, 1.8999282 , 3.29620955,
       1.63179561, 2.27730585, 3.57597294, 2.57413658, 0.79618097,
       2.95683765, 3.71201254, 2.18358794, 2.03938304, 2.70395737,
       2.56943907, 1.74233346, 1.66961944, 2.17169633, 2.4947307 ,
       3.24701787, 1.59514234, 2.5149015 , 2.52151378, 2.09963638,
       1.62754687, 3.54913462, 1.49919553, 2.96006375, 2.55367904,
       2.6988049 , 2.2181423 , 1.36207501, 5.69745192, 2.17364486,
       1.62983294, 0.57313497, 3.29837452, 2.6048344 , 0.93686778,
       2.53168973, 2.69875086, 4.30194818, 2.39567333, 2.60893018,
       2.68118207, 3.82093662, 2.76493366, 1.9390335 , 2.79788398,
       3.90940035, 3.24328196, 4.96162822, 2.04815173, 3.41905473,
       2.10018356, 1.96929527, 3.78063351, 2.19470648, 1.65359734,
       2.39730946, 2.99705226, 2.27067671, 6.74553205, 5.55676218,
       3.04821391, 5.7208383 , 4.02036856, 2.40655335, 3.13728661,
       1.68318852, 1.56006356, 1.90094849, 2.40072525, 2.00165814,
       2.8853239 , 3.6623974 , 2.03207865, 2.34344196, 4.88695374,
       2.37672362, 2.65805255, 2.19605287, 1.2830147 , 2.34856352,
       1.75110229, 2.459089  , 1.57307914, 4.47397795, 4.47888361,
       3.71105471, 2.19609707, 3.04600698, 3.21016267, 1.56594803,
       3.16449441, 2.65654139, 2.17192873, 1.81554657, 3.44116137,
       1.46323177, 2.16778798, 1.91420767, 2.07087609, 3.70442977,
       1.87853691, 2.13720793, 1.92701549, 3.40430645, 2.26611668,
       2.66917264, 1.33530893, 4.08564501, 3.5392085 , 2.60315255,
       1.46113111, 1.81873912, 2.93226707, 1.98074507, 3.09248022,
       1.97618361, 1.3009186 , 2.72756022, 1.94760998, 0.86886414,
       2.13950893, 2.79460457, 2.22130922, 1.9196715 , 2.44655563,
       2.13907611, 2.66854952, 2.8499266 , 1.70700774, 3.24633934,
       3.86595139, 2.77518389, 3.27114474, 2.34880886, 1.56091779,
       4.41940545, 2.79219165, 2.89449021, 1.8158406 , 1.78913024,
       1.28541397, 2.97242352, 2.92343271, 2.99734577, 2.61700827,
       2.65152295, 5.69172007, 1.92005135, 2.3799246 , 2.28626639,
       3.43671122, 1.47179666, 2.52798902, 2.531343  , 1.20638151,
       3.92795849, 2.8448967 , 4.71854193, 1.0577258 ])
In [30]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
Mean Squared Error: 1.2503677493687742
In [41]:
mse
Out[41]:
1.2503677493687742
In [ ]:
# K-FOLD CROSS VALIDATION
In [43]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
In [44]:
# Load the dataset
wine_data = pd.read_csv("wineQT.csv")
In [45]:
# Separate features (X) and target variable (y)
X = wine_data.drop(columns=['quality'])
y = wine_data['quality']
In [46]:
X
Out[46]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol Id
0 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 0
1 7.8 0.880 0.00 2.6 0.098 25.0 67.0 0.99680 3.20 0.68 9.8 1
2 7.8 0.760 0.04 2.3 0.092 15.0 54.0 0.99700 3.26 0.65 9.8 2
3 11.2 0.280 0.56 1.9 0.075 17.0 60.0 0.99800 3.16 0.58 9.8 3
4 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 4
... ... ... ... ... ... ... ... ... ... ... ... ...
1138 6.3 0.510 0.13 2.3 0.076 29.0 40.0 0.99574 3.42 0.75 11.0 1592
1139 6.8 0.620 0.08 1.9 0.068 28.0 38.0 0.99651 3.42 0.82 9.5 1593
1140 6.2 0.600 0.08 2.0 0.090 32.0 44.0 0.99490 3.45 0.58 10.5 1594
1141 5.9 0.550 0.10 2.2 0.062 39.0 51.0 0.99512 3.52 0.76 11.2 1595
1142 5.9 0.645 0.12 2.0 0.075 32.0 44.0 0.99547 3.57 0.71 10.2 1597

1143 rows × 12 columns

In [47]:
y
Out[47]:
0       5
1       5
2       5
3       6
4       5
       ..
1138    6
1139    6
1140    5
1141    6
1142    5
Name: quality, Length: 1143, dtype: int64
In [ ]:
# Define the number of folds for cross-validation
k = 5
# Initialize KFold object
kf = KFold(n_splits=k, shuffle=True, random_state=42)
In [48]:
kf
Out[48]:
KFold(n_splits=5, random_state=42, shuffle=True)
In [49]:
# Initialize lists to store the evaluation metrics
mse_scores = []
In [50]:
mse_scores
Out[50]:
[]
In [ ]:
# Iterate over each fold
for train_index, test_index in kf.split(X):
    # Split data into training and testing sets
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
In [51]:
X_train, X_test
Out[51]:
(      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
 0               7.4              0.70         0.00             1.9      0.076   
 2               7.8              0.76         0.04             2.3      0.092   
 3              11.2              0.28         0.56             1.9      0.075   
 4               7.4              0.70         0.00             1.9      0.076   
 5               7.4              0.66         0.00             1.8      0.075   
 ...             ...               ...          ...             ...        ...   
 1137            5.4              0.74         0.09             1.7      0.089   
 1138            6.3              0.51         0.13             2.3      0.076   
 1139            6.8              0.62         0.08             1.9      0.068   
 1140            6.2              0.60         0.08             2.0      0.090   
 1141            5.9              0.55         0.10             2.2      0.062   
 
       free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
 0                    11.0                  34.0  0.99780  3.51       0.56   
 2                    15.0                  54.0  0.99700  3.26       0.65   
 3                    17.0                  60.0  0.99800  3.16       0.58   
 4                    11.0                  34.0  0.99780  3.51       0.56   
 5                    13.0                  40.0  0.99780  3.51       0.56   
 ...                   ...                   ...      ...   ...        ...   
 1137                 16.0                  26.0  0.99402  3.67       0.56   
 1138                 29.0                  40.0  0.99574  3.42       0.75   
 1139                 28.0                  38.0  0.99651  3.42       0.82   
 1140                 32.0                  44.0  0.99490  3.45       0.58   
 1141                 39.0                  51.0  0.99512  3.52       0.76   
 
       alcohol    Id  
 0         9.4     0  
 2         9.8     2  
 3         9.8     3  
 4         9.4     4  
 5         9.4     5  
 ...       ...   ...  
 1137     11.6  1591  
 1138     11.0  1592  
 1139      9.5  1593  
 1140     10.5  1594  
 1141     11.2  1595  
 
 [915 rows x 12 columns],
       fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
 1               7.8             0.880         0.00             2.6      0.098   
 13              7.9             0.320         0.51             1.8      0.341   
 14              7.6             0.390         0.31             2.3      0.082   
 20              7.1             0.710         0.00             1.9      0.080   
 21              7.8             0.645         0.00             2.0      0.082   
 ...             ...               ...          ...             ...        ...   
 1126            7.5             0.520         0.40             2.2      0.060   
 1127            8.0             0.300         0.63             1.6      0.081   
 1130            7.4             0.350         0.33             2.4      0.068   
 1135            5.8             0.610         0.11             1.8      0.066   
 1142            5.9             0.645         0.12             2.0      0.075   
 
       free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
 1                    25.0                  67.0  0.99680  3.20       0.68   
 13                   17.0                  56.0  0.99690  3.04       1.08   
 14                   23.0                  71.0  0.99820  3.52       0.65   
 20                   14.0                  35.0  0.99720  3.47       0.55   
 21                    8.0                  16.0  0.99640  3.38       0.59   
 ...                   ...                   ...      ...   ...        ...   
 1126                 12.0                  20.0  0.99474  3.26       0.64   
 1127                 16.0                  29.0  0.99588  3.30       0.78   
 1130                  9.0                  26.0  0.99470  3.36       0.60   
 1135                 18.0                  28.0  0.99483  3.55       0.66   
 1142                 32.0                  44.0  0.99547  3.57       0.71   
 
       alcohol    Id  
 1         9.8     1  
 13        9.2    19  
 14        9.7    21  
 20        9.4    28  
 21        9.8    29  
 ...       ...   ...  
 1126     11.8  1575  
 1127     10.8  1576  
 1130     11.9  1580  
 1135     10.9  1587  
 1142     10.2  1597  
 
 [228 rows x 12 columns])
In [52]:
y_train, y_test
Out[52]:
(0       5
 2       5
 3       6
 4       5
 5       5
        ..
 1137    6
 1138    6
 1139    6
 1140    5
 1141    6
 Name: quality, Length: 915, dtype: int64,
 1       5
 13      6
 14      5
 20      5
 21      6
        ..
 1126    6
 1127    6
 1130    6
 1135    6
 1142    5
 Name: quality, Length: 228, dtype: int64)
In [54]:
# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)
Out[54]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [55]:
 # Make predictions on the test set
y_pred = model.predict(X_test)
In [56]:
y_pred 
Out[56]:
array([5.16186431, 5.43479564, 5.4099847 , 5.04619825, 5.37704568,
       5.7990418 , 5.38731241, 5.43093026, 5.35588733, 5.29421816,
       6.63569715, 5.62679956, 5.00725958, 4.79458272, 6.24309063,
       5.37429889, 6.97932141, 5.70767983, 5.47693236, 5.85157399,
       5.08360332, 4.97385585, 5.227863  , 5.40778371, 5.6873337 ,
       5.0492026 , 6.05221991, 6.02449667, 5.55668676, 4.9799088 ,
       6.84811605, 5.61450834, 5.87182838, 5.4222998 , 5.78893723,
       5.91544362, 5.69223483, 4.85279719, 5.2761029 , 5.5057901 ,
       6.17136137, 6.26883163, 5.70079611, 5.70076378, 5.2563155 ,
       6.22684128, 6.10755136, 6.84600024, 5.2940607 , 5.78932803,
       5.74935871, 5.96072529, 5.17025127, 5.16986734, 5.17022067,
       5.57877849, 5.96387974, 5.49275985, 6.63407454, 5.77748158,
       5.39598662, 5.70716412, 6.55672665, 5.33760978, 5.77942747,
       5.50962442, 6.12335577, 5.35827673, 6.10912862, 5.44154945,
       5.88810051, 6.25332822, 6.00611659, 6.06743437, 6.40275454,
       5.96543164, 5.17794207, 4.73384219, 6.05276512, 5.3526892 ,
       4.85885224, 5.16134159, 5.29722373, 5.27316423, 6.02841554,
       5.57243444, 5.44138746, 5.41448582, 5.75515053, 5.64434483,
       5.16386656, 6.06867888, 4.77733117, 6.0163159 , 5.07399768,
       5.59226689, 5.31554421, 4.7729377 , 5.16219757, 5.18661082,
       5.55869181, 4.83932641, 5.15024736, 5.41985256, 5.49618322,
       5.4961509 , 5.53854026, 6.47043781, 6.18676984, 5.38664136,
       5.42814432, 5.9855998 , 6.1910727 , 6.42648999, 5.4435614 ,
       4.77444262, 5.61816966, 5.58577652, 5.74073933, 5.80766136,
       5.68411174, 6.33403541, 6.45014122, 5.17709068, 5.6430476 ,
       6.29329723, 5.85728149, 6.5493569 , 5.80993138, 4.88902914,
       6.36817438, 4.89424114, 5.84031068, 5.26049131, 5.4497633 ,
       6.18365773, 5.91343057, 6.39614328, 5.016204  , 6.63169597,
       5.64698016, 6.21235087, 6.11561583, 6.5747321 , 6.09800881,
       6.33180473, 6.38375643, 6.48389748, 5.28843156, 6.73248582,
       6.40585391, 6.69928591, 6.21667497, 6.3795492 , 5.57881334,
       5.25549336, 5.24929646, 5.37188191, 6.3530185 , 6.04638406,
       6.02085125, 5.38647511, 5.79402117, 6.0916635 , 5.12719094,
       5.47277288, 6.37687869, 5.38888697, 6.18555351, 5.25150491,
       6.20165538, 6.51934611, 5.38417076, 5.12845699, 5.36505494,
       5.19289557, 5.28341534, 5.01096604, 5.15909701, 5.62492582,
       5.62489349, 5.62479651, 5.67433823, 5.90380965, 5.53311896,
       5.73321606, 4.98641665, 4.81652005, 5.13260822, 5.43416856,
       5.28721649, 5.1511722 , 6.47609733, 6.92311286, 5.37636023,
       5.97669435, 5.72890941, 5.44585044, 5.764088  , 5.4827883 ,
       4.92087612, 5.34527458, 6.68606406, 5.38025643, 5.14729562,
       5.57927144, 6.08723916, 5.04054407, 5.30379653, 6.56058845,
       6.36308623, 5.68673533, 4.8270962 , 5.24619146, 5.53831323,
       5.70246751, 5.20148276, 4.77529309, 5.13248835, 5.13245602,
       5.35318881, 5.83844359, 6.07998464, 6.16051537, 6.09330422,
       6.26019292, 5.65303901, 5.39817093])
In [ ]:
 # Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)
In [57]:
mse
Out[57]:
0.3776269060991377
In [58]:
# Append the mse to the list of scores
mse_scores.append(mse)
In [59]:
mse_scores
Out[59]:
[0.3776269060991377]
In [60]:
# Calculate the average mean squared error
avg_mse = sum(mse_scores) / k
print("Average MSE:", avg_mse)
Average MSE: 0.07552538121982753
In [61]:
avg_mse
Out[61]:
0.07552538121982753
In [65]:
import pandas as pd

# Load the dataset
wine_data = pd.read_csv("wineQT.csv")

# Check the column names in the dataset
print(wine_data.columns)

# Assuming 'type' column is present, perform one-hot encoding on the 'type' column
if 'type' in wine_data.columns:
    wine_data_encoded = pd.get_dummies(wine_data, columns=['type'])

    # Display the first few rows of the encoded dataset
    print(wine_data_encoded.head())
else:
    print("Column 'type' not found in the dataset.")
Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality', 'Id'],
      dtype='object')
Column 'type' not found in the dataset.
In [66]:
wine_data
Out[66]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality Id
0 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5 0
1 7.8 0.880 0.00 2.6 0.098 25.0 67.0 0.99680 3.20 0.68 9.8 5 1
2 7.8 0.760 0.04 2.3 0.092 15.0 54.0 0.99700 3.26 0.65 9.8 5 2
3 11.2 0.280 0.56 1.9 0.075 17.0 60.0 0.99800 3.16 0.58 9.8 6 3
4 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5 4
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1138 6.3 0.510 0.13 2.3 0.076 29.0 40.0 0.99574 3.42 0.75 11.0 6 1592
1139 6.8 0.620 0.08 1.9 0.068 28.0 38.0 0.99651 3.42 0.82 9.5 6 1593
1140 6.2 0.600 0.08 2.0 0.090 32.0 44.0 0.99490 3.45 0.58 10.5 5 1594
1141 5.9 0.550 0.10 2.2 0.062 39.0 51.0 0.99512 3.52 0.76 11.2 6 1595
1142 5.9 0.645 0.12 2.0 0.075 32.0 44.0 0.99547 3.57 0.71 10.2 5 1597

1143 rows × 13 columns

In [ ]:
# performing predictions on wine dataset 
In [62]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
wine_data = pd.read_csv("wineQT.csv")

# Separate features (X) and target variable (y)
X = wine_data.drop(columns=['quality'])
y = wine_data['quality']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
Mean Squared Error: 0.38242835212919646
In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
wine_data = pd.read_csv("wineQT.csv")

# Separate features (X) and target variable (y)
X = wine_data.drop(columns=['quality'])
y = wine_data['quality']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate R-squared
r_squared = r2_score(y_test, y_pred)
print("R-squared:", r_squared)
R-squared: 0.3127638539508196
In [ ]:
 
In [ ]:
 
In [ ]: